library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.3.0     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Organize and clean data

#Read excel files in folder
files <- list.files(path = "responses", pattern = "\\.csv$")

#read in all separate data files into a single list
#gives warning that can be ignored (last line not empty)
datalist <- lapply(
  paste0("responses/", files), 
  function(x) suppressWarnings(read.table(x, header = F))
)

#determine max length to fill dataframe
max_length <- max(unlist(lapply(datalist,length)))

#fill dataframe
df_filled <- lapply(datalist,function(x) {
  ans <- rep(NA,length=max_length);
  ans[1:length(x)]<- x;
  return(ans)
})

colnamelist <- c("ID", "length", "response", "effect_size_abs", "effect_size", "true_mean1", "true_mean2", "obs_mean_1", "obs_mean2", "obs_mean_dif", "df", "tvalue", "pvalue", "obs_power", "d")

#combine lists into a dataframe, make numeric and rename columns
dat <- do.call(rbind, df_filled) %>%
  as.data.frame() %>%
  mutate_all(as.numeric) %>%
  rename_at(1:15, ~ colnamelist) %>%
  # deal with extra values on end of each line
  gather(key, val, 16:ncol(.)) %>%
  filter(!is.na(val)) %>%
  select(-key) %>%
  group_by_at(vars(one_of(colnamelist))) %>%
  nest()

Create variables

#create variable for correct/incorrect
#create variable for sig/nonsig

all_data <- dat %>%
  mutate(
    significant = ifelse(pvalue <= 0.05, 1, 0),
    response = recode(response, "0" = 0, "1" = 1, "2" = -1),
    right_answer = case_when( 
      (effect_size == 0) ~ 0,
      (effect_size > 0) ~ 1,
      (effect_size < 0) ~ -1
    ),
    correct = ifelse(response == right_answer, 1, 0)
  )

#subset data, only trials with more than 5 responses
all_data_sub <- filter(all_data, length > 5)
# calculate percents for each judgment as sig for each effect_size
# for use in plots below
sub_pcnt <- all_data_sub %>%
  group_by(right_answer, effect_size) %>%
  summarise(
    pcnt_z = mean(response == 0),
    pcnt_p = mean(response == 1),
    pcnt_n = mean(response == -1),
    sig_z = sum(response == 0 & pvalue < .05)/sum(response == 0),
    sig_p = sum(response == 1 & pvalue < .05)/sum(response == 1),
    sig_n = sum(response == -1 & pvalue < .05)/sum(response == -1)
  ) %>%
  gather(key, val, pcnt_z:sig_n) %>%
  separate(key, c("key", "response")) %>%
  spread(key, val) %>%
  mutate(
    response = recode(response, "z" = 0, "p" = 1, "n" = -1),
    pcnt = round(pcnt, 2),
    sig = round(ifelse(is.nan(sig), 0, sig), 3),
    correct = right_answer == response
  )

Plots

Observed mean difference by effect size and response

  ggplot() +
    geom_rect(data = sub_pcnt,
              aes(fill = correct),
              xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, 
              alpha = 0.3, show.legend = FALSE) +
    geom_vline(data = all_data_sub, aes(xintercept = effect_size), color = "red") +
    geom_histogram(data = all_data_sub, aes(x=obs_mean_dif), 
                   binwidth = 0.1, color = "black", fill = "white") + 
    geom_text(data = sub_pcnt, aes(label = pcnt), x = -1.5, y = 25) +
    facet_grid(response~effect_size, labeller = label_both) +
    theme_bw() +
    scale_fill_manual(values = c("white", "grey50"))

ggsave("obs_mean_diff.png", width = 16, height = 10)

P-values by effect size and response

ggplot() +
  geom_rect(data = sub_pcnt, aes(fill = correct),
              xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, 
              alpha = 0.3, show.legend = FALSE) +
  geom_histogram(data = all_data_sub, aes(x=pvalue),
                 binwidth = 0.05, boundary = 0, 
                 color = "black", fill = "white", alpha = 0.5) + 
  geom_text(data = sub_pcnt, aes(label = paste("%sig =", sig*100)), x = .2, y = 50) +
  facet_grid(response ~ effect_size, labeller = label_both) +
  theme_bw() +
  scale_fill_manual(values = c("white", "grey50"))

ggsave("power.png", width = 16, height = 10)

Actual and observed d and power by effect size and response

(I filtered out rows with <= 5 observations)

all_data_sub %>%
  group_by(effect_size, response, correct) %>%
  summarize(
    n = n(),
    mean_d = mean(-d, na.rm = T), #note -d because d in dataset is calculated in opposite diferection!
    power = mean(pvalue <= 0.05), 
    mean_obs_d = mean(obs_mean_dif, na.rm = T), 
    obs_power = mean(obs_power, na.rm = T)
  ) %>%
  mutate_if(is.double, round, 3) %>%
  filter(n > 5) %>%
  knitr::kable()
effect_size response correct n mean_d power mean_obs_d obs_power
-0.8 -1 1 62 -0.898 0.919 -0.910 0.816
-0.5 -1 1 48 -0.700 0.917 -0.678 0.736
-0.2 -1 1 20 -0.366 0.400 -0.359 0.448
-0.2 0 0 25 -0.113 0.040 -0.114 0.136
0.0 -1 0 30 -0.218 0.100 -0.213 0.280
0.0 0 1 101 0.006 0.010 0.006 0.115
0.0 1 0 31 0.284 0.161 0.280 0.328
0.2 0 0 22 0.106 0.136 0.108 0.212
0.2 1 1 25 0.435 0.400 0.409 0.486
0.5 0 0 13 0.358 0.538 0.365 0.423
0.5 1 1 46 0.632 0.848 0.636 0.709
0.8 1 1 58 0.988 0.966 0.954 0.848

Actual and observed d and power by effect size

(ignoring response)

all_data_sub %>%
  group_by(effect_size) %>%
  summarize(
    n = n(),
    correct_pcnt = mean(correct),
    mean_d = mean(-d, na.rm = T), #note -d because d in dataset is calculated in opposite diferection!
    power = mean(pvalue <= 0.05), 
    mean_obs_d = mean(obs_mean_dif, na.rm = T), 
    obs_power = mean(obs_power, na.rm = T)
  ) %>%
  mutate_if(is.double, round, 3) %>%
  knitr::kable()
effect_size n correct_pcnt mean_d power mean_obs_d obs_power
-0.8 70 0.886 -0.910 0.914 -0.920 0.816
-0.5 54 0.889 -0.674 0.870 -0.655 0.712
-0.2 47 0.426 -0.233 0.213 -0.231 0.279
0.0 162 0.623 0.018 0.056 0.018 0.187
0.2 49 0.510 0.278 0.265 0.266 0.353
0.5 60 0.767 0.566 0.767 0.571 0.639
0.8 58 1.000 0.988 0.966 0.954 0.848

Calculate X% of data

(I’m not sure what’s going on here)

find_cover_region <- function(x, alpha=0.5) {
  n <- length(x)
  x <- sort(x)
  k <- as.integer(round((1-alpha) * n))
  i <- which.min(x[seq.int(n-k, n)] - x[seq_len(k+1L)])
  c(x[i], x[n-k+i-1L])
}
tapply(-all_data_sub$d, all_data_sub$response, find_cover_region)
## $`-1`
## [1] -0.8249109 -0.3589261
## 
## $`0`
## [1] -0.17304226  0.05205021
## 
## $`1`
## [1] 0.3677225 0.8761518
tapply(all_data_sub$obs_mean_dif, all_data_sub$response, find_cover_region)
## $`-1`
## [1] -0.8770313 -0.4127584
## 
## $`0`
## [1] -0.16894816  0.05487738
## 
## $`1`
## [1] 0.2943237 0.7974972

Cumulative means

Calculate cumulative means at each observation for each trial.

cummeans <- all_data_sub %>%
  mutate(trial = row_number()) %>%
  unnest() %>%
  mutate(key = ifelse(val %in% c(1, 2), "grp", "val")) %>%
  group_by(trial, key) %>%
  mutate(obs_n = row_number()) %>%
  ungroup() %>%
  spread(key, val) %>%
  mutate(obs_n = ceiling(obs_n/2),
         grp = recode(grp, "1" = "A", "2" = "B")) %>%
  spread(grp, val) %>%
  mutate(val = B-A) %>%
  group_by(trial) %>%
  mutate(cummean = cumsum(val) / obs_n) %>%
  ungroup()

Plot cumulative means at each observation for effect size by response.

cummeans %>%
  ggplot(aes(obs_n, cummean, color = as.factor(trial))) +
  geom_hline(aes(yintercept = effect_size), color = "grey50") +
  geom_line(show.legend = FALSE) +
  facet_grid(effect_size~response, labeller = label_both)
## Warning: Removed 295 rows containing missing values (geom_path).

ggsave("cummean.png", width = 10, height = 10)
## Warning: Removed 295 rows containing missing values (geom_path).
binwidth <- 10
cummeans %>%
  mutate(length_bin = ceiling(length/2/binwidth) * binwidth) %>%
  ggplot(aes(obs_n, cummean, group = as.factor(trial), color = as.factor(response))) +
  geom_hline(aes(yintercept = effect_size)) +
  geom_line(show.legend = FALSE) +
  facet_grid(effect_size~length_bin, labeller = label_both, scales = "free_x") +
  scale_color_manual(values = c(alpha("blue", .3), 
                                alpha("black", .3), 
                                alpha("red", .3))) +
  scale_x_continuous(breaks = seq(0, 150, by = binwidth)) +
  coord_cartesian(ylim = c(-2, 2)) +
  ggtitle("Cumulative means for decision length bins")
## Warning: Removed 295 rows containing missing values (geom_path).

ggsave("cummean_binned.png", width = 20, height = 15)
## Warning: Removed 295 rows containing missing values (geom_path).
binwidth <- 10
cummeans %>%
  filter(effect_size == 0) %>%
  mutate(length_bin = ceiling(length/2/binwidth) * binwidth) %>%
  ggplot(aes(obs_n, cummean, group = as.factor(trial), color = as.factor(response))) +
  geom_hline(aes(yintercept = effect_size)) +
  geom_line(show.legend = FALSE) +
  facet_grid(response~length_bin, labeller = label_both, scales = "free_x") +
  scale_color_manual(values = c(alpha("blue", .3), 
                                alpha("black", .3), 
                                alpha("red", .3))) +
  scale_x_continuous(breaks = seq(0, 150, by = binwidth)) +
  coord_cartesian(ylim = c(-2, 2)) +
  ggtitle("Cumulative means for decision length bins where effect size = 0")
## Warning: Removed 97 rows containing missing values (geom_path).

ggsave("cummean_binned_0.png", width = 20, height = 15)
## Warning: Removed 97 rows containing missing values (geom_path).

Some model comparison of predictors of correct responses. “after_N” variables are how close the cumulative mean is to the effect size after 10, 20, 30, etc observation pairs. This is probably not a great analysis strategy.

maxlen <- 50

dat <- cummeans %>%
  filter(length/2 >= maxlen, obs_n %in% seq(10, maxlen, by = 10)) %>%
  mutate(obs_n = paste0("after_", obs_n),
         close = abs(cummean - effect_size)) %>%
  select(trial, effect_size, effect_size_abs, correct, obs_n, close) %>%
  spread(obs_n, close)


mod10 <- glm(correct ~ effect_size_abs + after_10, family = binomial, data = dat)
summary(mod10)
## 
## Call:
## glm(formula = correct ~ effect_size_abs + after_10, family = binomial, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5538  -1.3335   0.9106   1.0296   1.0339  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)      0.34721    0.26811   1.295    0.195
## effect_size_abs  0.62892    0.66069   0.952    0.341
## after_10         0.02099    0.54800   0.038    0.969
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 289.67  on 216  degrees of freedom
## Residual deviance: 288.74  on 214  degrees of freedom
## AIC: 294.74
## 
## Number of Fisher Scoring iterations: 4
mod20 <- glm(correct ~ effect_size_abs + after_20, family = binomial, data = dat)
summary(mod20)
## 
## Call:
## glm(formula = correct ~ effect_size_abs + after_20, family = binomial, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6020  -1.3215   0.9020   0.9921   1.3348  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)       0.6582     0.2617   2.516   0.0119 *
## effect_size_abs   0.6109     0.6644   0.919   0.3579  
## after_20         -1.2130     0.7640  -1.588   0.1124  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 289.67  on 216  degrees of freedom
## Residual deviance: 286.20  on 214  degrees of freedom
## AIC: 292.2
## 
## Number of Fisher Scoring iterations: 4
mod30 <- glm(correct ~ effect_size_abs + after_30, family = binomial, data = dat)
summary(mod30)
## 
## Call:
## glm(formula = correct ~ effect_size_abs + after_30, family = binomial, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6424  -1.2585   0.8302   0.9518   1.3886  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.9335     0.2738   3.409 0.000651 ***
## effect_size_abs   0.6600     0.6747   0.978 0.327968    
## after_30         -3.0202     1.0622  -2.843 0.004463 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 289.67  on 216  degrees of freedom
## Residual deviance: 280.32  on 214  degrees of freedom
## AIC: 286.32
## 
## Number of Fisher Scoring iterations: 4
mod40 <- glm(correct ~ effect_size_abs + after_40, family = binomial, data = dat)
summary(mod40)
## 
## Call:
## glm(formula = correct ~ effect_size_abs + after_40, family = binomial, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7025  -1.2735   0.7998   0.9679   1.5374  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.0038     0.2785   3.604 0.000314 ***
## effect_size_abs   0.6876     0.6781   1.014 0.310553    
## after_40         -3.8297     1.2396  -3.089 0.002005 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 289.67  on 216  degrees of freedom
## Residual deviance: 278.48  on 214  degrees of freedom
## AIC: 284.48
## 
## Number of Fisher Scoring iterations: 4
mod50 <- glm(correct ~ effect_size_abs + after_50, family = binomial, data = dat)
summary(mod50)
## 
## Call:
## glm(formula = correct ~ effect_size_abs + after_50, family = binomial, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7047  -1.2604   0.8367   0.9672   1.6472  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.9085     0.2669   3.405 0.000663 ***
## effect_size_abs   0.6601     0.6715   0.983 0.325535    
## after_50         -3.6915     1.3021  -2.835 0.004581 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 289.67  on 216  degrees of freedom
## Residual deviance: 280.39  on 214  degrees of freedom
## AIC: 286.39
## 
## Number of Fisher Scoring iterations: 4
# compare models using AIC
library(AICcmodavg)
models <- list()
models[[1]] <- mod10
models[[2]] <- mod20
models[[3]] <- mod30
models[[4]] <- mod40
models[[5]] <- mod50
modelnames <- seq(10, maxlen, by = 10)
atab <- aictab(models, modelnames)
atab
## 
## Model selection based on AICc:
## 
##    K   AICc Delta_AICc AICcWt Cum.Wt      LL
## 40 3 284.59       0.00   0.55   0.55 -139.24
## 30 3 286.43       1.85   0.22   0.77 -140.16
## 50 3 286.50       1.91   0.21   0.99 -140.19
## 20 3 292.32       7.73   0.01   1.00 -143.10
## 10 3 294.85      10.26   0.00   1.00 -144.37